********************************************************************************
*                                                                              *
*                 OBTAIN OCCUPATION AND EDUCATION DATA FROM LFS                *
*                                                                              *
********************************************************************************

// Define min and max years
global minyear = 1996
global maxyear = 2021

// Define variable list for what variables to keep
	global varlist rinpersoons rinpersoon RINPERSOONS RINPERSOON EbbAflJaar EbbAflKwartaal EbbHhbMV EbbAflLft EbbHhbBurgSt EbbAflCodeGebl EbbHhbImmigDat EbbAflArbRel1 EbbTypISCO2008 EbbTypisco2008 EbbTypISCO2008R  EbbTypisco2008r EbbAflSOI5HB EbbAflSOI3HB EbbAflHatField EbbAflHatLevel EbbAflHatYear EbbAlgLidVakb EbbAflDienstver1 EbbAlgOngeval EbbCdlGemeentecode EbbCdlAber92Ni
	
// Import all 'old' LFS from min to max year
forvalues year = $minyear(1)2018 {

	if `year' == 2007 | `year' == 2008 | `year' == 2009 {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB `year'V7.SAV", case(lower) clear
	} 
	if `year' == 1996 | `year' == 1997 | `year' == 1998 | `year' == 1999 | `year' == 2006 | `year' == 2011 {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB `year'V8.SAV", case(lower) clear
	} 
	if `year' == 2000 | `year' == 2001 | `year' == 2005 {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB `year'V9.SAV", case(lower) clear
	}
	if `year' == 2002 | `year' == 2003 | `year' == 2004 | `year' == 2010  {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB `year'V10.SAV", case(lower) clear
	}
	if `year' == 2012  {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB`year'V11.SAV", case(lower) clear
	}
	if `year' == 2013  {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB`year'V9.SAV", case(lower) clear
	}
	if `year' == 2014 | `year' == 2015 {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB`year'V5.SAV", case(lower) clear
	}
	if `year' == 2016 {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB`year'V3.SAV", case(lower) clear
	}
	if `year' == 2017 {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB`year'V2.SAV", case(lower) clear
	}
	if `year' == 2018 {
		import spss $varlist using "G:\Arbeid\EBB\\`year'\EBB`year'V1.SAV", case(lower) clear
	}
	
	drop if rinpersoons == "E"

	** FILL THE MISSING OCCUPATIONAL CODES FROM THE REVISED (by STATISTICS NETHERLANDS) ISCO VARIABLE
	capture replace ebbtypisco2008=ebbtypisco2008r if ebbtypisco2008==""     &  ebbtypisco2008r!=""
	capture replace ebbtypisco2008=ebbtypisco2008r if ebbtypisco2008=="XXXX" & ebbtypisco2008r!=""
	capture replace ebbtypisco2008=ebbtypisco2008r if ebbtypisco2008=="0000" & ebbtypisco2008r!="" 
	capture replace ebbtypisco2008=ebbtypisco2008r if ebbtypisco2008!= ebbtypisco2008r & ebbtypisco2008r!=""

	rename (ebbafljaar ebbaflkwartaal ebbhhbmv ebbafllft ebbhhbburgst ebbaflcodegebl  ebbalglidvakb ebbtypisco2008 ebbafldienstver1 ebbaflarbrel1 ebbaflhatlevel ebbaflhatfield ebbaflhatyear ebbcdlgemeentecode) (lfsyear lfsquarter gender age maritalstatus countryofbirth unionm isco08 jobtype workerstatus educlevel educfield educcompl rmun)

	capture rename ebbaflsoi5hb educ_soi5
	capture rename ebbaflsoi3hb educ_soi3
	capture destring rmun, force replace
	
	g male = gender==1
	g married = maritalstatus==1
	g divorced = maritalstatus==2
	g widow = maritalstatus==3
	g contract_perm = jobtype==1 | jobtype==2
	g contract_selfempl = jobtype==5
	g contract_unknown = jobtype==7
	g union_member = unionm==1
	g union_unknown = unionm==.

	
	foreach var in rinpersoons gender maritalstatus jobtype unionm ebbalgongeval countryofbirth {
	    capture drop `var'
	}
	
// 	destring rinpersoon, force replace
// 	format rinpersoon %10.0f
	destring isco08, force replace
	destring educlevel, force replace
	destring educfield, force replace
	capture destring educ_soi3, force replace
	capture destring educ_soi5, force replace
	
	save "H:\Robots and Workers\Data\LFSX_`year'.dta", replace
	
}

clear all
forvalues year = $minyear(1)2018 {
	append using "H:\Robots and Workers\Data\LFSX_`year'.dta"
	erase "H:\Robots and Workers\Data\LFSX_`year'.dta"
}

merge m:1 lfsyear lfsquarter using  "H:\Robots and Workers\Data\LFS_periodmatch.dta", nogen keep(1 3)

// Obtain values from variables that are empty based quarters 
sort rinpersoon lfsyear lfsquarter lfsperiod
foreach var in isco08 educ_soi3 educ_soi5 {
	replace `var' = `var'[_n-1] if rinpersoon[_n]==rinpersoon[_n-1] & `var'[_n]==. & (lfsperiod[_n]-lfsperiod[_n-1])<=5
	forvalues n = 1(1)15 {
		replace `var' = `var'[_n+1] if rinpersoon[_n]==rinpersoon[_n+1] & `var'[_n]==. & (lfsperiod[_n+1]-lfsperiod[_n])<=5
	}
}

// Keep one observation per year
gsort -lfsyear -lfsquarter rinpersoon 
duplicates drop rinpersoon lfsyear, force //One obs per year


// Drop irrelevant variables
foreach var in lfsperiod educ_soi educlevel_soi5 educlevel educfield educcompl educlevel_alt rti rtiw nr_cog_anal nr_cog_pers r_cog r_man nr_man_phys nr_man_pers nr_man_persInt age ebbhhbimmigdat workerstatus ebbtypisco2008r male married divorced widow contract_perm contract_selfempl contract_unknown union_member union_unknown {
	capture drop `var'
}

do "H:\Robots and Workers\Do-files\Variable labels.do"
save "H:\Robots and Workers\Data\LFSX_before.dta", replace


********************************************************************************
*                                                                              *
*      OBTAIN 'ONLY' OCCUPATION AND EDUCATION DATA FROM NEW VERSION OF LFS     *
*                                                                              *
********************************************************************************

// Define variable list for what variables to keep
	global varlist rinpersoon rinpersoons ebbafljaar ebbaflkwartaal EBBTW1ISCO2008V OPLNIVSOI2016AGG1HB OPLNIVSOI2016AGG2HB

	
// Import all 'new' LFS from min to max year
qui forvalues year = 2003(1)$maxyear {
	nois _dots `year' 0 
	if (`year' >= $minyear & `year' <= 2009) | (`year' >= 2011 & `year' <= 2018) {
		import spss RINPERSOONS RINPERSOON EBBAFLJAAR EBBAFLKWARTAAL EBBTW1ISCO2008V OPLNIVSOI2016AGG1HB OPLNIVSOI2016AGG2HB EBBAFLGEMCWRKAJJJJ using "G:\Arbeid\EBBnw\EBBnw`year'V5.SAV", case(lower) clear 
	}
	if `year' == 2019 {
		import spss RINPERSOONS RINPERSOON EBBAFLJAAR EBBAFLKWARTAAL EBBTW1ISCO2008V OPLNIVSOI2016AGG1HB OPLNIVSOI2016AGG2HB EBBAFLGEMCWRKAJJJJ using "G:\Arbeid\EBBnw\EBBnw`year'V4.SAV", case(lower) clear 
	}
	if `year' == 2010 {
		use "G:\Arbeid\EBBnw\geconverteerde data\EBBNW`year'V4.dta", clear
		keep RINPERSOONS RINPERSOON EBBAFLJAAR EBBAFLKWARTAAL EBBTW1ISCO2008V OPLNIVSOI2016AGG1HB OPLNIVSOI2016AGG2HB EBBAFLGEMCWRKAJJJJ
		rename RINPERSOONS RINPERSOON EBBAFLJAAR EBBAFLKWARTAAL EBBTW1ISCO2008V OPLNIVSOI2016AGG1HB OPLNIVSOI2016AGG2HB EBBAFLGEMCWRKAJJJJ, lower
	}
	if `year' == 2020 | `year' == 2021 {
		import spss RINPERSOONS RINPERSOON EBBAFLJAAR EBBAFLKWARTAAL EBBTW1ISCO2008V OPLNIVSOI2016AGG1HB OPLNIVSOI2016AGG2HB EBBAFLGEMCWRKAJJJJ using "G:\Arbeid\EBBnw\EBBnw`year'V2.SAV", case(lower) clear 
	}
	drop if rinpersoons == "E"
	
	rename (ebbafljaar ebbaflkwartaal ebbtw1isco2008v ebbaflgemcwrkajjjj) (lfsyear lfsquarter isco08 wmun)
	capture rename oplnivsoi2016agg1hb educ_soi3
	capture rename oplnivsoi2016agg2hb educ_soi5
	
	foreach var in rinpersoons {
	    capture drop `var'
	}
	
// 	destring rinpersoon, force replace
// 	format rinpersoon %10.0f
	destring isco08, force replace
	destring wmun, force replace
	capture destring educ_soi3, force replace
	capture destring educ_soi5, force replace
	save "H:\Robots and Workers\Data\LFSX_extnew_`year'.dta", replace
	
}


clear all
forvalues year = 2003(1)$maxyear {
	append using "H:\Robots and Workers\Data\LFSX_extnew_`year'.dta"
	erase "H:\Robots and Workers\Data\LFSX_extnew_`year'.dta"
}

destring lfsyear, force replace
destring lfsquarter, force replace
merge m:1 lfsyear lfsquarter using  "H:\Robots and Workers\Data\LFS_periodmatch.dta", nogen keep(1 3)

// Obtain values from variables that are empty
sort rinpersoon lfsyear lfsquarter lfsperiod
foreach var in isco08 educ_soi3 educ_soi5 wmun {
	replace `var' = `var'[_n-1] if rinpersoon[_n]==rinpersoon[_n-1] & `var'[_n]==. & (lfsperiod[_n]-lfsperiod[_n-1])<=5
	forvalues n = 1(1)15 {
		replace `var' = `var'[_n+1] if rinpersoon[_n]==rinpersoon[_n+1] & `var'[_n]==. & (lfsperiod[_n+1]-lfsperiod[_n])<=5
	}
}

// Keep one observation per year
gsort -lfsyear -lfsquarter rinpersoon 
duplicates drop rinpersoon lfsyear, force //One obs per year

g isco08_extnew = isco08

do "H:\Robots and Workers\Do-files\Variable labels.do"
compress
save "H:\Robots and Workers\Data\LFSX_extnew.dta", replace



********************************************************************************
*                                                                              *
*              MERGE THE TWO LFSs TOGETHER AND MERGE TO WORKER DATA            *
*                                                                              *
********************************************************************************

use "H:\Robots and Workers\Data\LFSX_before.dta", clear
keep rinpersoon lfsyear lfsquarter isco08 educ_soi3 educ_soi5

merge 1:1 rinpersoon lfsyear using "H:\Robots and Workers\Data\LFSX_extnew.dta", keep(1 2 3 4 5) update keepusing(educ_soi5 educ_soi3 isco08_extnew) nogen
sort rinpersoon lfsyear lfsquarter

// Keep occupation and education BEFORE year of observations
forvalues year = 2009(1)$maxyear {
	preserve
		keep if lfsyear < `year'
		g year = `year'
		gsort -lfsyear -lfsquarter rinpersoon 
		// Only keep most recent observation
		duplicates drop rinpersoon, force 
		save "H:\Robots and Workers\Data\LFS_before_temp`year'.dta", replace
	restore
}

// Open worker data
use "H:\Robots and Workers\Data\Workers_PS.dta", clear
forvalues year = 2009(1)$maxyear {
	merge m:1 rinpersoon year using "H:\Robots and Workers\Data\LFS_before_temp`year'.dta", nogen keep(1 3 4 5) update
	erase "H:\Robots and Workers\Data\LFS_before_temp`year'.dta"
}

g lfsyeardiff = year-lfsyear
drop if lfsyeardiff ==.
replace isco08 = . if isco08 == 0 | isco08==9999
replace educ_soi3 = . if educ_soi3 == 9
replace educ_soi5 = . if educ_soi5 == 99

// Obtain RTI data
rename isco08 ISCO08Code
forvalues year = $minyear(1)$maxyear {
		merge m:1 ISCO08Code lfsyear using "H:\Base Files\RTI\onet-ISCO08RTI_LFSX_`year'.dta", nogen keep(1 3 4 5) update keepusing(RTI* nr_cog_anal nr_cog_pers r_cog r_man nr_man_phys nr_man_pers nr_man_persInt)
}
rename (RTI RTIw) (rti rtiw)	
rename ISCO08Code isco08
drop rtiw

su rti
replace rti = (rti-r(mean))/r(sd)

merge m:1 isco08 using "H:\PR8912\Bluecollar\bluecollar_new.dta", keep(1 3) nogen keepusing(bluecollar)

	g lowskilled = educ_soi3 == 1 if educ_soi3!=.
	g lowmedskilled = educ_soi3 == 1 | educ_soi3 == 2 if educ_soi3!=.


	*Generate variables for most affected dummy
	g routine = rti>=1 if rti!=.

	g meffect = routine==1 & bluecollar==1 if routine!=. & bluecollar!=.
	g robotsXmeffect = robots*meffect_rb
	g robotsXnonmeffect = robots*(1-meffect_rb)
	g competeXmeffect = compete*meffect_rb
	g competeXnonmeffect = compete*(1-meffect_rb)
	
	g meffect_rbold =  routine==1 & bluecollar==1 & age>47 if routine!=. & bluecollar!=. & age!=.
	g robotsXmeffect_rbold = robots*meffect_rbold
	g robotsXnonmeffect_rbold = robots*(1-meffect_rbold)
	g competeXmeffect_rbold = compete*meffect_rbold
	g competeXnonmeffect_rbold = compete*(1-meffect_rbold)

save "H:\Robots and Workers\Data\Workers_PS_LFS.dta", replace
